# tabNA.R
# created 2012 June 10
#
# Function that helps to examine the amount of missingness in a given 
# variable.

tabNA <- function (
  x, 
  missing.codes    = NULL, 
  NA.codes         = NULL, 
  show.educ        = FALSE, 
  mid.educ.varname = 'HSgrad',
  yearFacName      = 'year.fac',
  envir            = parent.frame()) { 
  # "missing.codes" is a character string indicating which values of x are 
  # missing data -- values that ideally would have been observed given the 
  # study design, but weren't observed, perhaps because the respondent 
  # couldn't be located.  [2012 05 26]
  
  # "NA.codes" is a character string indicating which values of x are "not 
  # applicable."  These values are assigned to subjects who were never even 
  # assigned to be asked a question.  (This occurs for years in which a 
  # question wasn't asked, and in split-ballot studies.)  [2012 05 26]
  
  # If missing.codes is NULL, the calculations are simple.  We just assume 
  # that data are missing if and only if they are NA.  [2012 05 26]
  
  # If missing.codes is not NULL, we omit from consideration all NA values.
  # The percent of missingness in x is calculated as the percentage of 
  # non-NA codes that are not in missing.codes.  [2012 05 27] 
  
  # "envir" stipulates where the function should look for the other variables
  # that it needs: year.fac, educ, mid.educ.varname, etc.  In the future, I 
  # should just pass these variables directly to the function.  [2012 06 15]
  
  year.fac <- get(yearFacName, envir = envir)
  if (length(year.fac) == 0) {
    stop("Length of the year.fac variable is 0.")
  }
  
  if (!is.null(missing.codes)) {
    # We don't need the character form of missing.codes at all.  We -do- need
    # the character form of NA.codes (to create "recode.statement" -- see 
    # below -- and to keep everything uniform, I assume that missing.codes is
    # a string, too.  [2012 05 27]
    missing.codes <- eval(parse(text=missing.codes))
  }  
    
  # Recode NA.codes values to NA.  
  if (!is.null(NA.codes)) {
    require(car)      # for recode()
    if (length(NA.codes)==1 && substr(NA.codes, 1, 1)!='"') {
      # Wrap NA.codes in quotation marks.  car::recode() needs this.
      # [2012 05 27]
      NA.codes <- paste('"', NA.codes, '"', sep='')  
    }
    recode.statement <- paste(NA.codes, "=NA", sep='')
    x <- car::recode(x, recode.statement)
  } 
  
  if (show.educ) { 
    mid.educ <- get(mid.educ.varname, envir = envir)
    college  <- envir$college 
    educ.tri <- NULL
    educ.tri[!mid.educ]           <- 'lo'
    educ.tri[mid.educ & !college] <- 'med'
    educ.tri[college]             <- 'hi'
    educ.tri <- ordered(educ.tri, levels=c('lo', 'med', 'hi'))
  }
  if (is.null(missing.codes) & show.educ) {
    result <- tapply(x, list(year.fac, educ.tri), function (x) sum(is.na(x))/length(x) )
  }
  else if (is.null(missing.codes) & !show.educ){
    result <- tapply(x, year.fac, function (x) sum(is.na(x))/length(x) )
  }
  else if (!is.null(missing.codes) & !show.educ) {
    result <- tapply(x, year.fac, function (x) sum(x%in%missing.codes)/length(x[!is.na(x)]) )
  }
  else if (!is.null(missing.codes) & show.educ) {
    result <- tapply(x, list(year.fac, educ.tri), function (x) sum(x%in%missing.codes)/length(x[!is.na(x)]) )
  }
  round(result, 2)
}




